# Load packages
library(sf)
library(xml2)
library(tidyverse)
library(tidybayes)
library(brms)
# Define genus level taxon groups (plus one family FAVI)
taxon_groups <- list(
PORI = c("PPOR", "PFUR", "PDIV", "PAST", "PORI"),
ORBI = c("OFAV", "OANN", "OFRA", "ORBI"),
FAVI = c("CNAT", "DLAB", "PSTR", "PCLI", "MARE", "FAVI"),
AGAR = c("AFRA", "AAGA", "AHUM", "ALAM", "AGAR"),
MADR = c("MAUR", "MSEN", "MDEC", "MPHA", "MADR"),
SOLE = c("SHYA", "SBOU", "SOLE"),
SCOL = c("SLAC", "SCUB", "SCOL"),
SIDE = c("SSID", "SRAD", "SIDE"),
MYCE = c("MFER", "MLAM", "MALI", "MYCE"),
OCUL = c("OROB", "ODIF", "OCUL")
)
# Convert to lookup tibble
taxon_lookup <- enframe(taxon_groups, name = "taxon_group", value = "taxon") %>%
unnest(taxon)
# Define juvenile family level taxon groups (following DRM survey convention)
taxon_groups_juv <- list(
MUSS = c("ISIN", "ISOP", "MANG", "MYCE", "SCOL", "MUSS"),
FAVI = c("FAVI", "FFRA", "MARE"),
MEAN = c("MMEA", "MEAN", "DCYL", "DSTO", "EFAS")
)
# Convert to lookup tibble
taxon_lookup_juv <- enframe(taxon_groups_juv, name = "taxon_group", value = "taxon") %>%
unnest(taxon)
# Order levels of Habitat Types
type_levels <- c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
"Outer Reef", "Aggregated Patch Reef")
type_labels <- c("NRC", "IR", "MR", "OR", "APR")
names(type_labels) <- type_levels
# Order survey datasets
dataset_levels <- c("dca", "tt21", "tt23", "drm")
dataset_labels <- c("2017—DCA", "2021—TT", "2023—TT", "2024—Shedd")
names(dataset_labels) <- dataset_levels
DCA site metadata
# Site metadata
# Read in site coordinates
dca_sitemd0 <- readxl::read_xlsx("data/2017_dca_recon/Recon_Site_Coordinates_Extracted.xlsx") %>%
janitor::clean_names()
# All sites have start and end coordinates...
# Tidy and Calculate midpoint per transect
dca_sitemd <- dca_sitemd0 %>%
mutate(
depth = abs(as.numeric(depth)),
across(c(latitude, longitude), as.numeric)
) %>%
group_by(site = transect) %>%
summarize(
latitude = mean(latitude, na.rm = TRUE),
longitude = mean(longitude, na.rm = TRUE),
depth = mean(depth, na.rm = TRUE),
.groups = "drop"
)
DCA coral data
# Read in survey data
dca0 <- readxl::read_xlsx("data/2017_dca_recon/Compiled_DCA_RECON_Belt_data.xlsx") %>%
janitor::clean_names()
dca <- dca0 %>%
select(1:18) %>%
rename(site = site_name) %>%
mutate(site = factor(site)) %>%
select(site, taxon = coral_species, max_width_cm = max_size_cm)
# Adjust/corrects species IDs
sort(unique(dca$taxon))
## [1] "AAGA" "ACER" "AFRA"
## [4] "AGA SP" "AHUM" "ALAM"
## [7] "CNAT" "CORAL" "Cup Coral"
## [10] "DLAB" "DSTO" "EFAS"
## [13] "LCUC" "MAD SP" "MADSP"
## [16] "MALI" "MARE" "MCAV"
## [19] "MDEC" "MLAM" "MMEA"
## [22] "MPHA" "Mycetophyllia spp." "MYCSP"
## [25] "OANN" "ODIF" "OFAV"
## [28] "OFAV\\" "PAST" "PCLI"
## [31] "PFUR" "PPOR" "PSTR"
## [34] "SBOU" "Scolymia Spp" "SCUB"
## [37] "Sid SP" "SID SP" "SID SP."
## [40] "SIDSP" "SINT" "SRAD"
## [43] "SSID"
dca <- dca %>%
mutate(taxon = case_when(
taxon == "AGA SP" ~ "AGAR",
taxon == "LCUC" ~ "HCUC",
taxon %in% c("MYCSP", "Mycetophyllia spp.") ~ "MYCE",
taxon == "OFAV\\" ~ "OFAV",
taxon %in% c("MAD SP", "MADSP") ~ "MADR",
taxon == "Scolymia Spp" ~ "SCOL",
taxon %in% c("SIDSP", "Sid SP", "SID SP.", "SID SP") ~ "SIDE",
TRUE ~ taxon
))
sort(unique(dca$taxon))
## [1] "AAGA" "ACER" "AFRA" "AGAR" "AHUM" "ALAM"
## [7] "CNAT" "CORAL" "Cup Coral" "DLAB" "DSTO" "EFAS"
## [13] "HCUC" "MADR" "MALI" "MARE" "MCAV" "MDEC"
## [19] "MLAM" "MMEA" "MPHA" "MYCE" "OANN" "ODIF"
## [25] "OFAV" "PAST" "PCLI" "PFUR" "PPOR" "PSTR"
## [31] "SBOU" "SCOL" "SCUB" "SIDE" "SINT" "SRAD"
## [37] "SSID"
# Filter out unidentified corals
dca <- dca %>%
filter(!taxon %in% c("CORAL", "Cup Coral"))
# Write long data to file
write_csv(dca, file = "data/processed/dca_2017_long.csv")
# Convert to count data
# Add explicit zeros for any taxon/size class missing at any site
dca_counts <- dca %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# # Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(dca_counts, file = "data/processed/dca_2017_counts.csv")
# Aggregate count data based on taxonomic groups defined above
dca_counts_ag <- dca_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
dca_counts_ag <- dca_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(dca_counts_ag, file = "data/processed/dca_2017_counts_ag.csv")
2021 TT site metadata
# site metadata
tt21_sitemd0 <- read_csv("data/2021_tt_recon_esa/midpoints_latlon.csv") %>%
janitor::clean_names()
tt21_sitemd <- tt21_sitemd0 %>%
mutate(name = str_remove(name, "A$")) %>%
select(site = name, longitude = lon, latitude = lat)
2021 TT coral data
# coral data - recon belt transects
tt21recon0 <- readxl::read_xlsx("data/2021_tt_recon_esa/Recon 30x1m Coral Belt Transect.xlsx") %>%
janitor::clean_names()
tt21recon <- tt21recon0 %>%
select(site, taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
mutate(taxon = toupper(taxon), site = factor(site)) %>%
mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# ESA survey data
tt21esa0 <- readxl::read_xlsx("data/2021_tt_recon_esa/ESA Coral Belt Transect.xlsx") %>%
janitor::clean_names()
sort(unique(tt21esa0$esa_id))
## [1] "0.0" "Acropora cervicornis" "ML QC Not ESA"
## [4] "Orbicella faveolata" "Orbicella franksi" "Outside Belt"
tt21esa <- tt21esa0 %>%
mutate(site = factor(site),
taxon = case_when(
esa_id == "Orbicella franksi" ~ "OFRA",
esa_id == "Orbicella faveolata" ~ "OFAV",
esa_id == "Acropora cervicornis" ~ "ACER")) %>%
filter(!is.na(taxon)) %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# Combine Recon and ESA survey data
tt21 <- bind_rows(tt21recon, tt21esa)
# Check taxa names
sort(unique(tt21recon$taxon))
## [1] "0" "AAGA"
## [3] "AFRAG" "ALAM"
## [5] "CNAT" "DLAB"
## [7] "DSTO" "EFAS"
## [9] "FFRAG" "JUVENILE-UNIDENTIFIABLE"
## [11] "MALC" "MANG"
## [13] "MCAV" "MDEC"
## [15] "MMEA" "MPHA"
## [17] "MYALI" "MYLAM"
## [19] "ODIF/OROB" "PAST"
## [21] "PDCLIV" "PDSTR"
## [23] "PHYLLANGIA AMERICANA" "PPOR"
## [25] "SBOU" "SCOLYMIA CUBENSIS"
## [27] "SCOLYMIA LACERA" "SINT"
## [29] "SRAD" "SSID"
## [31] "XESTO"
# Filter out unidentified OR NON-CORAL taxa
tt21 <- tt21 %>%
filter(!taxon %in% c("0", "JUVENILE-UNIDENTIFIABLE", "XESTO", "MALC"))
# Adjust/corrects species IDs
tt21 <- tt21 %>%
mutate(taxon = case_when(
taxon == "AFRAG" ~ "AFRA",
taxon == "FFRAG" ~ "FFRA",
taxon == "MYALI" ~ "MALI",
taxon == "MYFER" ~ "MFER",
taxon == "MYLAM" ~ "MLAM",
taxon == "ODIF/OROB" ~ "OCUL",
taxon == "PDCLIV" ~ "PCLI",
taxon == "PDSTR" ~ "PSTR",
taxon == "PHYLLANGIA AMERICANA" ~ "PAME",
taxon == "SCOLYMIA CUBENSIS" ~ "SCUB",
taxon == "SCOLYMIA LACERA" ~ "SLAC",
TRUE ~ taxon
))
sort(unique(tt21$taxon))
## [1] "AAGA" "ACER" "AFRA" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA" "MALI"
## [11] "MANG" "MCAV" "MDEC" "MLAM" "MMEA" "MPHA" "OCUL" "OFAV" "OFRA" "PAME"
## [21] "PAST" "PCLI" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt21, file = "data/processed/tt_2021_long.csv")
# Count
# Add explicit zeros for any taxon/size class missing at any site
tt21_counts <- tt21 %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(tt21_counts, file = "data/processed/tt_2021_counts.csv")
# Aggregate count data based on taxonomic groups defined above
tt21_counts_ag <- tt21_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
tt21_counts_ag <- tt21_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(tt21_counts_ag, file = "data/processed/tt21_counts_ag.csv")
TT site metadata
# Site metadata
tt_sitemd <- readxl::read_xlsx("data/2023_tt_impact/Impact site tracking.xlsx", skip = 1) %>%
janitor::clean_names()
tt_sitemd <- tt_sitemd %>%
mutate(site = transect_name,
latitude = as.numeric(actual_start_y),
longitude = as.numeric(actual_start_x)) %>%
select(site, latitude, longitude)
# Many sites missing coords in sheet.... whats up with that
tt_sitemd <- drop_na(tt_sitemd, longitude)
TT coral data
tt0 <- readxl::read_xlsx("data/2023_tt_impact/Impact Raw Data 05 31 2024.xlsx") %>%
janitor::clean_names()
tt <- tt0 %>%
select(site = transect_name, depth_ft_start,
taxon = id_abbrev, coral_length_cm, coral_width_cm) %>%
filter(taxon != "Xesto") %>%
mutate(taxon = toupper(taxon)) %>%
mutate(across(c(coral_length_cm, coral_width_cm), as.numeric)) %>%
mutate(site = factor(site))
tt <- tt %>%
mutate(max_width_cm = pmax(coral_length_cm, coral_width_cm)) %>%
select(site, taxon, max_width_cm)
# Check taxa names
sort(unique(tt$taxon))
## [1] "?" "AAGA" "ACER" "AFRA" "AFRAG" "ALAM"
## [7] "ASP" "ASP." "CNAT" "DLAB" "DSTO" "EFAS"
## [13] "FFRA" "HCUC" "ID-ABBREV" "MCAV" "MCAV?" "MDEC"
## [19] "MHEARD" "MMEA" "MSEN" "MSP." "MUSSID" "MYALI"
## [25] "MYFER" "MYLAM" "NONE" "OFAV" "OFR" "OFRA"
## [31] "OROB" "PAME" "PAST" "PCLI" "PCLI?" "PDIV"
## [37] "PPOR" "PSP" "PSP." "PSTR" "SBOU" "SCUB"
## [43] "SHYA" "SINT" "SLAC" "SRAD" "SSID" "SSP."
## [49] "STOK"
# Filter out unidentified taxa
tt <- tt %>%
filter(!taxon %in% c("?", "ID-ABBREV", "NONE", "MHEARD"))
# Adjust/corrects species IDs
tt <- tt %>%
mutate(taxon = case_when(
taxon == "AFRAG" ~ "AFRA",
taxon %in% c("ASP", "ASP.") ~ "AGAR",
taxon == "MCAV?" ~ "MCAV",
taxon == "MSP." ~ "MADR",
taxon == "MUSSID" ~ "MUSS",
taxon == "MYALI" ~ "MALI",
taxon == "MYFER" ~ "MFER",
taxon == "MYLAM" ~ "MLAM",
taxon == "OFR" ~ "OFRA",
taxon == "PCLI?" ~ "PCLI",
taxon %in% c("PSP", "PSP.") ~ "PORI",
taxon == "SSP." ~ "SIDE",
taxon == "STOK" ~ "DSTO",
TRUE ~ taxon
))
sort(unique(tt$taxon))
## [1] "AAGA" "ACER" "AFRA" "AGAR" "ALAM" "CNAT" "DLAB" "DSTO" "EFAS" "FFRA"
## [11] "HCUC" "MADR" "MALI" "MCAV" "MDEC" "MFER" "MLAM" "MMEA" "MSEN" "MUSS"
## [21] "OFAV" "OFRA" "OROB" "PAME" "PAST" "PCLI" "PDIV" "PORI" "PPOR" "PSTR"
## [31] "SBOU" "SCUB" "SHYA" "SIDE" "SINT" "SLAC" "SRAD" "SSID"
# Write long data to file
write_csv(tt, file = "data/processed/tt_2024_long.csv")
# Count
# Add explicit zeros for any taxon/size class missing at any site
tt_counts <- tt %>%
mutate(class = ifelse(max_width_cm >= 4, ">4cm", "<4cm")) %>%
count(site, taxon, class) %>%
complete(site, taxon, class = c(">4cm", "<4cm"), fill = list(n = 0)) %>%
# Don't create zeros for MEAN/MUSS/FAVI adults, since these IDs only applied to juv
filter(!(taxon %in% c("MEAN", "MUSS", "FAVI") & class == ">4cm" & n == 0))
write_csv(tt_counts, file = "data/processed/tt_2024_counts.csv")
# Aggregate count data based on taxonomic groups defined above
tt_counts_ag <- tt_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
# Further aggregate juvenile counts to family (following DRM methods)
tt_counts_ag <- tt_counts_ag %>%
left_join(taxon_lookup_juv, by = "taxon") %>%
mutate(
taxon = if_else(class == "<4cm" & !is.na(taxon_group), taxon_group, taxon)
) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(tt_counts_ag, file = "data/processed/tt_counts_ag.csv")
Shedd site metadata
# Site metadata
drm_sitemd <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
janitor::clean_names() %>%
mutate(site = as.character(drm_site_id)) %>%
select(site, latitude = lat, longitude = lon) %>%
mutate(depth = NA)
Shedd coral data
# Adult coral data from main DRM surveys
#adults0 <- read_csv("data/2024_shedd_drm/DRM_broward_corals.csv")
#adults0 %>% filter(team == "Shedd Aquarium")
# Most sites were included in main DRM database for 2024 -- Import these
adultst1t2 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect1and2_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
select(site, transect_num, species, width, height)
adultst3t4 <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_RawCoralDataTransect3and4_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium") %>%
select(site, transect_num, species, width, height)
# 9 of our PEV sites were removed from DRM database to avoid oversaturating the ares -- Import these separately
removedt1t2 <- readxl::read_xlsx(
"data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T1-T2") %>%
janitor::clean_names() %>%
select(site, transect_num, species, width, height)
removedt3t4 <- readxl::read_xlsx(
"data/2024_shedd_drm/2024_DRM_Broward_RemovedSites_T1-T4_Shedd.xlsx", sheet = "Removed Sites T3-T4") %>%
janitor::clean_names() %>%
select(site, transect_num, species, width, height)
# Combine all adult coral data for Shedd DRM surveys at PEV
adults0 <- bind_rows(adultst1t2, adultst3t4, removedt1t2, removedt3t4)
# Convert adult data to long format
adults_long <- adults0 %>%
mutate(max_width_cm = pmax(width, height, na.rm = TRUE)) %>%
mutate(max_width_cm = as.character(max_width_cm)) %>%
mutate(site = str_remove(site, "^AA")) %>%
select(site, transect_num, taxon = species, max_width_cm) %>%
drop_na(taxon) # DROPS when taxon is blank, this is when no corals >4cm were observed
# Import juvenile counts from main DRM dataset
juv <- readxl::read_xlsx("data/2024_shedd_drm/2024ANU_JuvenileCoralData_Shedd.xlsx") %>%
janitor::clean_names() %>%
filter(subregion == "Broward-Miami", team == "Shedd Aquarium")
# Import juvenile counts from sites that were removed from main DRM dataset
removed_juv <- readxl::read_xlsx("data/2024_shedd_drm/Shedd_removed_sites_Juveniles_2024.xlsx") %>%
janitor::clean_names() %>%
# Missing values in count data should be zero counts (unique to this datasheet from FWC)
mutate(across(ends_with("_ct"), ~replace_na(., 0)))
# Combine juvenile data
juv0 <- bind_rows(juv, removed_juv) %>%
mutate(site = str_remove(site, "^AA")) %>%
select(site, transect_num, ends_with("ct")) %>%
rename(MCAV = montastraea_ct, MUSS = mussinae_ct, FAVI = faviinae_ct, MEAN = meandrinidae_ct)
# Convert juvenile data to long format
juv_long <- juv0 %>%
pivot_longer(c(MUSS, FAVI, MEAN, MCAV), names_to = "taxon", values_to = "n") %>%
mutate(max_width_cm = "<4") %>%
uncount(n)
# Other juvenile taxa counts from Transects 1 and 2 (DRM 'bonus data')
t1t2bonus <- read_csv("data/2024_shedd_drm/T1_T2_bonus_data.csv") %>%
janitor::clean_names() %>%
mutate(site = replace_na(site, "NA")) %>% # Because one site is called "NA"
mutate(transect_num = parse_number(transect))
t1t2juv <- t1t2bonus %>%
select(site, transect_num, starts_with("small")) %>%
rename_with(~ toupper(gsub("^small_", "", .x)), starts_with("small_"))
t1t2juv_long <- t1t2juv %>%
pivot_longer(3:10, names_to = "taxon", values_to = "n") %>%
mutate(max_width_cm = "<4") %>%
uncount(n)
# Replace site names in t1t2 bonus data with the correct DRM site ID
penipsites <- readxl::read_xlsx("data/2024_shedd_drm/site_metadata.xlsx") %>%
janitor::clean_names()
t1t2juv_long_updated <- t1t2juv_long %>%
left_join(penipsites %>% select(site, drm_site_id), by = "site") %>%
mutate(site = as.character(drm_site_id)) %>%
select(-drm_site_id)
# Combine all data
drm_long <- bind_rows(adults_long, juv_long, t1t2juv_long_updated) %>%
mutate(team = "Shedd Aquarium")
# Check species names
sort(unique(drm_long$taxon))
## [1] "AAGA" "ACER" "AFRA" "CNAT" "DLAB" "DSTO" "EFAS" "FAVI" "FFRA" "MALI"
## [11] "MAUR" "MCAV" "MDEC" "MEAN" "MMEA" "MUSS" "OANN" "OFAV" "OFRA" "PAST"
## [21] "PCLI" "PFUR" "PPOR" "PSTR" "SBOU" "SCUB" "SINT" "SRAD" "SSID"
write_csv(drm_long, file = "data/processed/drm_2024_long.csv")
# COUNT based on rules
# ✅ Updated Rules Summary for counting from DRM/Shedd data:
# Juvenile taxa (searched for in <4cm size class only):
# "MEAN", "MUSS", "FAVI"
# → these should only ever appear in <4cm, never >4cm, and should not be zero-filled for adults.
# Other juvenile-capable taxa:
# "MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR"
# → these can be counted in both >4cm and <4cm, but only in <4cm if juveniles were searched on that transect and team.
# Transect-based search rules still apply:
# Transects 1 & 2: all adult taxa always searched. Juvenile search depends on team:
# "Shedd Aquarium" → all juvenile taxa above searched
# others → only MEAN, MUSS, FAVI, MCAV
# Transects 3 & 4:
# only subset of adult taxa searched (adult_taxa_t3t4)
# only juveniles: MEAN, MUSS, FAVI, MCAV
# Step 1: Define size classes
drm_classed <- drm_long %>%
mutate(class = case_when(as.numeric(max_width_cm) >= 4 ~ ">4cm",
max_width_cm == "<4" ~ "<4cm"))
# Step 2: Define species sets
all_taxa <- unique(drm_classed$taxon)
adult_taxa_t3t4 <- c("CNAT", "DSTO", "DLAB", "MMEA", "MANG", "MALI",
"MFER", "MLAM", "PCLI", "PSTR")
juv_only_taxa <- c("MEAN", "MUSS", "FAVI")
juv_both_taxa <- c("MCAV", "SSID", "SRAD", "PAST", "PPOR", "SINT", "SBOU", "AAGA", "MAUR")
all_juv_taxa <- c(juv_only_taxa, juv_both_taxa)
# Step 3: Build search grid per site × transect × team
search_grid <- drm_classed %>%
distinct(site, transect_num, team) %>% # if multiple teams in data, remove value for team
mutate(
searched_taxa_class = pmap(list(transect_num, team), function(transect, team) {
# Helper: define juv taxa allowed for this transect/team
juv_taxa <- if (transect %in% c(1, 2)) {
if (team == "Shedd Aquarium") { # Shedd searched for other juv taxa on T1 and T2, other DRM survey teams did not
all_juv_taxa
} else {
c(juv_only_taxa, "MCAV")
}
} else {
c(juv_only_taxa, "MCAV")
}
# Adults always searched in 1 & 2, subset in 3 & 4
adult_taxa <- if (transect %in% c(1, 2)) {
setdiff(all_taxa, juv_only_taxa) # exclude juv-only taxa
} else {
adult_taxa_t3t4
}
# Build grid
bind_rows(
expand_grid(taxon = adult_taxa, class = ">4cm"),
expand_grid(taxon = juv_taxa, class = "<4cm")
)
})
) %>%
unnest(searched_taxa_class)
# Step 4: Count observations
counts <- drm_classed %>%
group_by(site, transect_num, team, taxon, class) %>%
summarize(n = n(), .groups = "drop")
# Step 5: Join with grid and fill in zeros where appropriate
final_counts <- search_grid %>%
left_join(counts, by = c("site", "transect_num", "team", "taxon", "class")) %>%
mutate(n = replace_na(n, 0))
write_csv(final_counts, file = "data/processed/drm_2024_counts.csv")
# AGGREGATE COUNT DATA
# Aggregate taxa
final_counts_ag <- final_counts %>%
left_join(taxon_lookup, by = "taxon") %>%
mutate(taxon = coalesce(taxon_group, taxon)) %>%
select(-taxon_group) %>%
group_by(across(-n)) %>%
summarize(n = sum(n), .groups = "drop")
write_csv(final_counts_ag, file = "data/processed/drm_2024_counts_ag.csv")
# --- STEP 1: Load KML Polygons ---
polygons <- st_read("data/Habitat classifications.kml") # update path as needed
## Reading layer `Habitat classifications' from data source
## `/Users/rosscunning/Projects/PENIP/data/Habitat classifications.kml'
## using driver `KML'
## Simple feature collection with 190 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: -80.11618 ymin: 25.97477 xmax: -80.06143 ymax: 26.25943
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
# --- STEP 2: Extract Attributes from HTML Description ---
extract_attrs <- function(desc) {
if (is.na(desc) || desc == "") {
return(tibble(Habitat = NA, Type = NA, Modifier = NA, Region = NA, Type2 = NA))
}
html <- read_html(desc)
rows <- xml_find_all(html, "//table//table//tr")
keys <- rows %>% xml_find_all(".//td[1]") %>% xml_text(trim = TRUE)
vals <- rows %>% xml_find_all(".//td[2]") %>% xml_text(trim = TRUE)
n <- min(length(keys), length(vals))
named_vals <- set_names(vals[1:n], keys[1:n])
tibble(
Habitat = named_vals[["Habitat"]],
Type = named_vals[["Type"]],
Modifier = named_vals[["Modifier"]],
Region = named_vals[["Region"]],
Type2 = named_vals[["Type2"]]
)
}
# Apply function and combine with spatial geometries
attrs <- map_dfr(polygons$Description, extract_attrs)
polygons_clean <- bind_cols(polygons %>% select(-Description), attrs)
# --- STEP 3: Prepare Site Coordinate Data ---
# Get all site coordinates, and assign north and south
allsitemd <- bind_rows(.id = "dataset",
drm = drm_sitemd,
dca = dca_sitemd,
tt23 = tt_sitemd,
tt21 = tt21_sitemd
) %>%
mutate(dir = if_else(latitude > 26.093570, "N", "S"))
points <- st_as_sf(allsitemd, coords = c("longitude", "latitude"), crs = 4326)
# --- STEP 4: Validate Geometry and Match CRS ---
polygons_clean <- polygons_clean %>%
st_zm(drop = TRUE, what = "ZM") %>%
st_make_valid() %>%
st_transform(st_crs(points))
sf_use_s2(FALSE) # prevent s2 geometry issues
# --- STEP 5: Spatial Join ---
joined <- st_join(points, polygons_clean, join = st_within)
joined_df <- joined %>%
mutate(longitude = st_coordinates(.)[,1],
latitude = st_coordinates(.)[,2]) %>%
st_drop_geometry()
allsitemd <- joined_df
# Visualize habitat classifications
(polyplot <- polygons_clean %>%
#filter(Type != "Sand") %>%
ggplot() +
geom_sf(aes(fill = Type), color = "black", size = 0.2, alpha = 0.6) +
scale_fill_brewer(palette = "Set3", na.value = "gray80") +
theme_minimal() +
labs(title = "Habitat Polygons Colored by Type", fill = "Type") +
theme(legend.position = "right") +
xlim(-80.11, -80.079) +
ylim(26.0675, 26.11))
# 'Sand' overlaps some of the other polygons instead of just surrounding them...
# If a point is classified as Sand AND something else, remove Sand...
multiclass <- allsitemd %>%
group_by(site) %>%
filter(n() > 1)
allsitemd_clean <- allsitemd %>%
group_by(site) %>%
filter(!(Type == "Sand" & n() > 1)) %>%
ungroup()
# Select sites in Nearshore Ridge Complex, Inner Reef, and Middle Reef, Outer Reef and Aggregated Patch Reef
selected <- allsitemd_clean %>%
filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef",
"Artificial", "Outer Reef", "Aggregated Patch Reef"))
# Plot selected sites
polyplot +
geom_point(data = selected,
aes(x = longitude, y = latitude, shape = dataset),
inherit.aes = FALSE, alpha = 0.6) +
scale_shape_manual(values = c(2, 3, 4, 5))
(dca_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "dca"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2017 — DCA Survey"))
(tt21_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "tt21"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2021 — Tetra Tech Survey"))
(tt23_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "tt23"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2023 — Tetra Tech Survey"))
(drm_plot <- polyplot +
geom_point(
data = filter(selected, dataset == "drm"),
aes(x = longitude, y = latitude), pch = 13, inherit.aes = FALSE, alpha = 0.6) +
labs(title = "2024 — Shedd Survey"))
# Combine all aggregated count data and filter for selected sites
df <- bind_rows(.id = "dataset",
drm = final_counts_ag,
dca = dca_counts_ag,
tt23 = tt_counts_ag,
tt21 = tt21_counts_ag
) %>%
filter(site %in% selected$site) %>%
select(dataset, site, transect_num, taxon, class, n) %>%
droplevels() %>%
left_join(allsitemd_clean)
# Add transect area information
df <- df %>%
mutate(transect_num = if_else(is.na(transect_num), 1, transect_num)) %>%
mutate(transect_area_m2 = case_when(
dataset == "drm" ~ 10, # DRM belt transects were 10m
dataset == "tt23" ~ 20, # TT23 belt transects were 20m
dataset == "tt21" ~ 30, # TT21 belt transects were 30m
dataset == "dca" ~ 30 # DCA belt transects were 30m
))
Can ‘Artificial’ be aggregated with ‘Nearshore Ridge Complex’?
‘Artificial’ is only present in DCA and minorly in TT – but absent from Shedd.
It is in close spatial proximity to Nearshore Ridge Complex – can these be combined?
Test for differences in coral density in DCA survey
# Subset DCA data
dcadf <- dca_counts_ag %>%
left_join(allsitemd_clean) %>%
filter(Type %in% c("Nearshore Ridge Complex", "Inner Reef", "Middle Reef", "Artificial"))
# Fit a Negative Binomial GLM
mod_nb <- MASS::glm.nb(n ~ Type, data = dcadf)
# Generate new data only for existing taxon-size class combinations
newdata_1 <- dcadf %>%
distinct(Type)
# Get predicted values & standard errors (log scale)
preds_nb <- predict(mod_nb, newdata_1, type = "link", se.fit = TRUE)
# Compute both total coral density & taxon-size class-specific densities in one step
results_nb <- newdata_1 %>%
mutate(
fit = exp(preds_nb$fit), # Convert fitted values to response scale
fit_se = exp(preds_nb$fit) * preds_nb$se.fit, # Convert SE using the Delta Method
fit_var = (fit * preds_nb$se.fit)^2, # Variance propagation
fit_lower = exp(preds_nb$fit - 1.96 * preds_nb$se.fit), # Lower CI
fit_upper = exp(preds_nb$fit + 1.96 * preds_nb$se.fit) # Upper CI
)
# Compute total coral density + confidence intervals
total_ci_nb <- results_nb %>%
group_by(Type) %>%
summarize(
total_density = sum(fit),
total_se = sqrt(sum(fit_var)),
lower_95CI = exp(log(total_density) - 1.96 * (total_se / total_density)),
upper_95CI = exp(log(total_density) + 1.96 * (total_se / total_density))
)
knitr::kable(total_ci_nb)
| Type | total_density | total_se | lower_95CI | upper_95CI |
|---|---|---|---|---|
| Artificial | 1.7580645 | 0.2269451 | 1.3650635 | 2.264210 |
| Inner Reef | 1.3680352 | 0.1696949 | 1.0727781 | 1.744555 |
| Middle Reef | 0.8573201 | 0.0998316 | 0.6823733 | 1.077120 |
| Nearshore Ridge Complex | 1.9869432 | 0.1764332 | 1.6695542 | 2.364669 |
Highly overlapping confidence intervals for Artifical and Nearshore Ridge Complex, so no difference in coral density.
Test for differences in community composition in DCA survey
library(vegan)
# 1. Create a wide community matrix: rows = site x Type, columns = taxa
comm_matrix <- dcadf %>%
group_by(site, Type, taxon) %>%
summarize(total_n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = taxon, values_from = total_n, values_fill = 0)
# 2. Extract community matrix and metadata
comm_data <- comm_matrix %>% select(-site, -Type)
site_info <- comm_matrix %>% select(site, Type)
# 3. Run NMDS (k = 2 dimensions is standard)
set.seed(123) # for reproducibility
nmds <- metaMDS(comm_data, distance = "bray", k = 2, trymax = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2267843
## Run 1 stress 0.2276397
## Run 2 stress 0.235479
## Run 3 stress 0.2268499
## ... Procrustes: rmse 0.006015082 max resid 0.04502679
## Run 4 stress 0.2325294
## Run 5 stress 0.2437078
## Run 6 stress 0.2276398
## Run 7 stress 0.2356379
## Run 8 stress 0.2315451
## Run 9 stress 0.2297249
## Run 10 stress 0.2314156
## Run 11 stress 0.2374012
## Run 12 stress 0.235405
## Run 13 stress 0.2327499
## Run 14 stress 0.2314157
## Run 15 stress 0.2267804
## ... New best solution
## ... Procrustes: rmse 0.001160623 max resid 0.007358882
## ... Similar to previous best
## Run 16 stress 0.2268231
## ... Procrustes: rmse 0.002309085 max resid 0.018594
## Run 17 stress 0.22764
## Run 18 stress 0.2268497
## ... Procrustes: rmse 0.006092397 max resid 0.04511697
## Run 19 stress 0.2352389
## Run 20 stress 0.2334834
## *** Best solution repeated 1 times
# 4. Prepare data for plotting
scores_df <- scores(nmds)$sites %>%
bind_cols(site_info)
# 5. Plot NMDS with ggplot2
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Type)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(title = "NMDS of Coral Community Structure by Habitat Type",
color = "Habitat Type")
High degree of similarity between Artificial and Nearshore Ridge Complex coral communities.
2.2.3. Combine ‘Artificial’ with ‘Nearshore Ridge Complex’
# Based on these results, combine "Artificial" with "Nearshore Ridge Complex"
df[df$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"
Remove lowest abundance coral taxa
These will be problematic for statistical models
# Drop taxa with very few observations
sppcounts <- df %>%
group_by(taxon) %>%
summarize(total = sum(n), .groups = "drop") %>%
arrange(total)
sppcounts
## # A tibble: 22 × 2
## taxon total
## <chr> <int>
## 1 MANG 0
## 2 FFRA 1
## 3 HCUC 1
## 4 PAME 1
## 5 SCOL 2
## 6 OCUL 3
## 7 ORBI 26
## 8 MYCE 30
## 9 EFAS 35
## 10 MUSS 37
## # ℹ 12 more rows
dff <- df %>%
filter(taxon %in% filter(sppcounts, total >= 5)$taxon) %>%
mutate(Type = factor(Type, levels = type_levels))
Taxa with less than 5 total observations were filtered out, which included: HCUC, MANG, PAME, FFRA, OCUL, SCOL
Predictors: dataset, habitat Type, direction from channel, taxon
# Get total counts for each taxon at each site
dfftaxon <- dff %>%
group_by(dataset, Type, dir, site, transect_num, transect_area_m2, taxon) %>%
summarize(n = sum(n)) %>%
ungroup()
# Remove any taxa not observed in a given dataset / habitat Type ?????
dfftaxon_trimmed <- dfftaxon %>%
group_by(dataset, Type, dir, taxon) %>%
filter(any(n > 0)) %>%
ungroup()
# SUPER MODEL
# mod_nb <- brm(
# bf(n ~ dataset * Type * dir * taxon + offset(log(transect_area_m2))),
# family = negbinomial(),
# data = dfftaxon_trimmed,
# prior = c(prior(normal(0, 2), class = "b"), # Weak prior on coefficients
# prior(normal(0, 5), class = "Intercept"), # Weak prior on intercept
# prior(exponential(1), class = "shape")), # Reasonable prior for NB dispersion
# chains = 4,
# cores = 4,
# threads = threading(5),
# iter = 1000, warmup = 500,
# thin = 2,
# control = list(adapt_delta = 0.9, max_treedepth = 12),
# backend = "cmdstanr"
# )
# saveRDS(mod_nb, file = "data/processed/mod_nb.rds")
mod_nb <- readRDS("data/processed/mod_nb.rds")
# 1. Create newdata grid (1 m² for standardization)
newdata <- dfftaxon_trimmed %>%
distinct(dataset, Type, dir, taxon) %>%
mutate(transect_area_m2 = 1)
# 2. Get fitted values manually by computing summary statistics across all draws
posterior_draws <- add_epred_draws(mod_nb, newdata = newdata) %>%
mutate(Type = factor(Type, levels = type_levels),
dataset = factor(dataset, levels = dataset_levels))
# Compute and plot total coral abundance by dataset, habitat Type, and direction from channel
total_abund <- posterior_draws %>%
group_by(.draw, dataset, Type, dir) %>%
summarize(total_epred = sum(.epred), .groups = "drop") %>%
group_by(dataset, Type, dir) %>%
summarize(
fit_mean = mean(total_epred),
fit_sd = sd(total_epred),
fit_lower = quantile(total_epred, 0.025),
fit_upper = quantile(total_epred, 0.975),
.groups = "drop"
)
ggplot(total_abund, aes(x = dataset, y = fit_mean, color = dir)) +
facet_grid(~ Type, labeller = as_labeller(type_labels)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
position = position_dodge(width = 0.5)) +
scale_x_discrete(labels = dataset_labels) +
theme(axis.text.x = element_text(angle = 90))
# Any N-S differences in total coral abundance?
total_NS_diff <- posterior_draws %>%
ungroup() %>%
group_by(.draw, dataset, Type, dir) %>%
summarize(total_epred = sum(.epred), .groups = "drop") %>%
pivot_wider(names_from = dir, values_from = total_epred) %>%
filter(!is.na(N), !is.na(S)) %>% # make sure both are present
mutate(diff = N - S) # or S - N depending on desired contrast
total_NS_summ <- total_NS_diff %>%
group_by(dataset, Type) %>%
summarize(
mean_diff = mean(diff),
lower_95 = quantile(diff, 0.025),
upper_95 = quantile(diff, 0.975),
p_gt_0 = mean(diff > 0),
p_lt_0 = mean(diff < 0),
sig = ifelse(lower_95 > 0 | upper_95 < 0, "yes", "no"),
.groups = "drop"
)
total_NS_summ
## # A tibble: 16 × 8
## dataset Type mean_diff lower_95 upper_95 p_gt_0 p_lt_0 sig
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 dca Nearshore Ridge Comp… -0.401 -0.925 0.106 0.062 0.938 no
## 2 dca Inner Reef -0.506 -1.23 0.0934 0.044 0.956 no
## 3 dca Middle Reef -0.328 -0.684 -0.0188 0.017 0.983 yes
## 4 dca Outer Reef -0.491 -0.823 -0.174 0.001 0.999 yes
## 5 dca Aggregated Patch Reef 0.109 -0.440 0.721 0.642 0.358 no
## 6 tt21 Nearshore Ridge Comp… -1.96 -2.91 -1.33 0 1 yes
## 7 tt21 Middle Reef -0.859 -2.37 0.558 0.089 0.911 no
## 8 tt21 Outer Reef -0.417 -2.19 1.43 0.305 0.695 no
## 9 tt21 Aggregated Patch Reef -0.373 -1.69 1.03 0.226 0.774 no
## 10 tt23 Nearshore Ridge Comp… 0.711 0.282 1.20 0.998 0.002 yes
## 11 tt23 Inner Reef 0.130 -0.973 1.39 0.572 0.428 no
## 12 tt23 Middle Reef -0.390 -1.00 0.136 0.09 0.91 no
## 13 tt23 Outer Reef -0.878 -1.70 -0.115 0.014 0.986 yes
## 14 drm Nearshore Ridge Comp… 1.45 0.486 2.63 0.999 0.001 yes
## 15 drm Inner Reef -0.108 -0.950 0.831 0.386 0.614 no
## 16 drm Middle Reef 1.03 -0.228 2.32 0.953 0.047 no
# 1. Sum predicted values across taxa (total coral density per draw × dataset × Type × dir)
draws_total <- posterior_draws %>%
mutate(dataset = as.character(dataset)) %>%
group_by(.draw, dataset, Type, dir) %>%
summarize(total_epred = sum(.epred), .groups = "drop")
# 2. Self-join to compare dataset pairs within each draw × Type × dir
pairwise_total <- draws_total %>%
rename(dataset1 = dataset, epred1 = total_epred) %>%
inner_join(
draws_total %>% rename(dataset2 = dataset, epred2 = total_epred),
by = c(".draw", "Type", "dir")
) %>%
filter(dataset1 < dataset2) %>%
mutate(diff = epred1 - epred2)
# 3. Summarize posterior contrasts
summary_total_contrasts <- pairwise_total %>%
group_by(Type, dir, dataset1, dataset2) %>%
summarize(
mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
prob_diff = mean(diff > 0),
.groups = "drop"
)
# 4. Optional: filter strong differences
summary_total_contrasts %>%
filter(prob_diff < 0.01 | prob_diff > 0.99) %>%
arrange(Type, dir)
## # A tibble: 18 × 8
## Type dir dataset1 dataset2 mean_diff lower upper prob_diff
## <fct> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Nearshore Ridge C… N dca drm -2.10 -3.26 -1.18 0
## 2 Nearshore Ridge C… N dca tt21 1.70 1.36 2.09 1
## 3 Nearshore Ridge C… N drm tt21 3.80 2.96 4.89 1
## 4 Nearshore Ridge C… N drm tt23 1.94 1.01 3.10 1
## 5 Nearshore Ridge C… N tt21 tt23 -1.85 -2.27 -1.52 0
## 6 Nearshore Ridge C… S dca tt23 0.959 0.493 1.45 1
## 7 Nearshore Ridge C… S drm tt23 1.20 0.744 1.74 1
## 8 Nearshore Ridge C… S tt21 tt23 0.814 0.152 1.79 0.991
## 9 Inner Reef N dca drm -1.09 -1.95 -0.394 0
## 10 Inner Reef N dca tt23 -0.926 -2.06 -0.0922 0.007
## 11 Middle Reef N dca drm -1.84 -2.95 -1.06 0
## 12 Middle Reef N dca tt23 -0.621 -1.05 -0.273 0
## 13 Middle Reef N drm tt23 1.21 0.364 2.34 0.999
## 14 Middle Reef S dca tt23 -0.683 -1.28 -0.178 0.004
## 15 Outer Reef N dca tt21 -1.68 -3.08 -0.770 0
## 16 Outer Reef N dca tt23 -0.784 -1.28 -0.340 0.001
## 17 Outer Reef S dca tt21 -1.61 -3.26 -0.491 0
## 18 Outer Reef S dca tt23 -1.17 -1.95 -0.542 0
fitted_taxon <- posterior_draws %>%
group_by(dataset, Type, dir, taxon) %>%
summarize(fit_mean = mean(.epred), # Posterior mean (expected value)
fit_sd = sd(.epred), # Posterior standard deviation
fit_lower = quantile(.epred, 0.025), # 2.5% quantile (lower CI)
fit_upper = quantile(.epred, 0.975)) # 97.5% quantile (upper CI)
# Plot
ggplot(fitted_taxon, aes(y = fit_mean, x = Type, color = dir, shape = dataset,
group = interaction(dataset, dir))) +
facet_wrap(taxon ~ ., scales = "free_x") +
geom_point(position = position_dodge(width = 0.5)) +
geom_line(position = position_dodge(width = 0.5), alpha = 0.5) +
geom_errorbar(aes(ymin = fit_lower, ymax = fit_upper), width = 0,
position = position_dodge(width = 0.5), lwd = 0.25) +
scale_y_log10() +
scale_x_discrete(labels = type_labels) +
labs(x = "Habitat Type", y = "Density (per m2)")
# Any N-S differences in taxon abundance within habitat Types/datasets?
#### Pivot wide so you can calculate N vs S difference per draw
taxon_NS_diff <- posterior_draws %>%
ungroup() %>%
select(.draw, dataset, Type, dir, taxon, .epred) %>%
pivot_wider(names_from = dir, values_from = .epred) %>%
filter(!is.na(N), !is.na(S)) %>% # ensure both directions exist for the draw
mutate(diff = S - N) # or N - S depending on interpretation
taxon_NS_summ <- taxon_NS_diff %>%
group_by(dataset, Type, taxon) %>%
summarize(
mean_diff = mean(diff),
lower_95 = quantile(diff, 0.025),
upper_95 = quantile(diff, 0.975),
p_gt_0 = mean(diff > 0),
p_lt_0 = mean(diff < 0),
sig = ifelse(lower_95 > 0 | upper_95 < 0, "yes", "no"),
.groups = "drop"
)
taxon_NS_summ %>%
filter(sig == "yes") %>%
mutate(greater = if_else(mean_diff > 0, "N", "S")) %>%
group_by(dataset, Type, greater) %>%
summarize(taxa = paste(taxon, collapse = ",")) %>%
arrange(Type, greater)
## # A tibble: 10 × 4
## # Groups: dataset, Type [8]
## dataset Type greater taxa
## <fct> <fct> <chr> <chr>
## 1 dca Nearshore Ridge Complex N DSTO,MCAV,MEAN,SINT
## 2 drm Nearshore Ridge Complex N MCAV
## 3 dca Nearshore Ridge Complex S ACER,FAVI,SOLE
## 4 tt23 Nearshore Ridge Complex S FAVI,PORI,SIDE,SOLE
## 5 drm Nearshore Ridge Complex S SIDE
## 6 tt23 Inner Reef S PORI
## 7 dca Middle Reef N MADR,PORI
## 8 tt23 Middle Reef N AGAR,MYCE,SINT
## 9 dca Outer Reef N DSTO,SIDE
## 10 tt23 Outer Reef N MADR,SINT
# Ensure dataset is character so we can do < comparison
draws_clean <- posterior_draws %>%
mutate(dataset = as.character(dataset))
# One self-join to get all unique dataset pairs
pairwise_contrasts <- draws_clean %>%
rename(dataset1 = dataset, epred1 = .epred) %>%
inner_join(
draws_clean %>% rename(dataset2 = dataset, epred2 = .epred),
by = c(".draw", "taxon", "dir", "Type")
) %>%
filter(dataset1 < dataset2) %>% # Avoid duplicates and self-pairs
mutate(diff = epred1 - epred2)
# Step: summarize the posterior differences
summary_contrasts <- pairwise_contrasts %>%
group_by(taxon, dir, Type, dataset1, dataset2) %>%
summarize(mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
prob_diff = mean(diff > 0),
.groups = "drop")
out <- summary_contrasts %>%
filter(prob_diff < 0.01) %>%
arrange(taxon, Type, dir)
out
## # A tibble: 33 × 9
## taxon dir Type dataset1 dataset2 mean_diff lower upper prob_diff
## <chr> <chr> <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ACER S Nearshore… dca drm -0.0250 -0.0515 -0.00722 0.003
## 2 AGAR S Middle Re… dca tt23 -0.0443 -0.0950 -0.0111 0.004
## 3 AGAR N Outer Reef dca tt21 -0.0734 -0.164 -0.0178 0.002
## 4 DSTO N Nearshore… dca tt23 -0.0276 -0.0507 -0.0106 0.001
## 5 FAVI S Nearshore… dca drm -0.0318 -0.0602 -0.00928 0.002
## 6 MADR N Outer Reef dca tt21 -0.0930 -0.230 -0.00984 0.005
## 7 MCAV N Nearshore… dca tt23 -0.125 -0.192 -0.0725 0
## 8 MCAV N Nearshore… drm tt23 -0.133 -0.201 -0.0762 0
## 9 MCAV N Inner Reef dca tt23 -0.390 -0.862 -0.118 0
## 10 MCAV N Middle Re… dca drm -0.177 -0.349 -0.0333 0.007
## # ℹ 23 more rows
# Proportion of transect area per dataset × Type × dir
survey_weights <- dfftaxon %>%
group_by(dataset, Type, dir) %>%
summarize(area = sum(transect_area_m2), .groups = "drop") %>%
group_by(dataset) %>%
mutate(weight = area / sum(area)) %>%
select(dataset, Type, dir, weight)
posterior_weighted <- posterior_draws %>%
mutate(dataset = as.character(dataset)) %>%
left_join(survey_weights, by = c("dataset", "Type", "dir")) %>%
mutate(weighted_epred = .epred * weight)
posterior_totals <- posterior_weighted %>%
group_by(.draw, dataset, taxon) %>%
summarize(total_epred = sum(weighted_epred), .groups = "drop")
pairwise_overall <- posterior_totals %>%
rename(dataset1 = dataset, epred1 = total_epred) %>%
inner_join(
posterior_totals %>% rename(dataset2 = dataset, epred2 = total_epred),
by = c(".draw", "taxon")
) %>%
filter(dataset1 < dataset2) %>%
mutate(diff = epred1 - epred2)
summary_overall <- pairwise_overall %>%
group_by(taxon, dataset1, dataset2) %>%
summarize(mean_diff = mean(diff),
lower = quantile(diff, 0.025),
upper = quantile(diff, 0.975),
prob_diff = mean(diff > 0),
.groups = "drop")
summary_overall %>%
filter(upper < 0 | lower > 0) %>%
print(n = nrow(.))
## # A tibble: 44 × 7
## taxon dataset1 dataset2 mean_diff lower upper prob_diff
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 ACER dca drm -0.0137 -0.0241 -0.00529 0
## 2 ACER dca tt21 0.00646 0.00349 0.00952 1
## 3 ACER drm tt21 0.0201 0.0117 0.0309 1
## 4 AGAR dca tt21 -0.0132 -0.0323 -0.000357 0.022
## 5 AGAR drm tt21 -0.0174 -0.0365 -0.00223 0.014
## 6 DSTO dca drm -0.0121 -0.0230 -0.00333 0.002
## 7 DSTO dca tt23 -0.00699 -0.0149 -0.000124 0.023
## 8 EFAS dca tt23 -0.00410 -0.00849 -0.000950 0.006
## 9 FAVI dca drm -0.0223 -0.0372 -0.0106 0
## 10 FAVI dca tt23 0.00835 0.00211 0.0141 0.993
## 11 FAVI drm tt21 0.0231 0.00725 0.0403 0.993
## 12 FAVI drm tt23 0.0307 0.0188 0.0449 1
## 13 MADR dca drm 0.0267 0.0164 0.0378 1
## 14 MADR drm tt21 -0.0481 -0.0858 -0.0239 0
## 15 MADR drm tt23 -0.0176 -0.0275 -0.00691 0.004
## 16 MADR tt21 tt23 0.0305 0.00666 0.0684 0.997
## 17 MCAV dca tt23 -0.0488 -0.0980 -0.00167 0.023
## 18 MCAV drm tt23 -0.0697 -0.121 -0.0173 0.005
## 19 MEAN dca drm 0.0215 0.0142 0.0282 1
## 20 MEAN dca tt21 0.0183 0.00805 0.0268 0.997
## 21 MEAN dca tt23 0.0162 0.00873 0.0234 1
## 22 MUSS dca drm -0.00442 -0.00980 -0.000253 0.013
## 23 MUSS dca tt21 -0.00600 -0.0153 -0.000424 0.019
## 24 MUSS dca tt23 0.00210 0.0000473 0.00408 0.975
## 25 MUSS drm tt23 0.00653 0.00263 0.0118 1
## 26 MUSS tt21 tt23 0.00810 0.00265 0.0176 0.999
## 27 MYCE dca tt23 -0.00309 -0.00683 -0.000279 0.017
## 28 ORBI dca tt21 -0.00825 -0.0174 -0.00225 0
## 29 ORBI dca tt23 0.00145 0.0000286 0.00297 0.976
## 30 ORBI drm tt23 0.00390 0.000635 0.00942 0.993
## 31 ORBI tt21 tt23 0.00970 0.00388 0.0191 1
## 32 PORI dca drm -0.127 -0.207 -0.0520 0
## 33 PORI drm tt23 0.115 0.0250 0.210 0.989
## 34 SIDE dca drm -0.970 -1.29 -0.689 0
## 35 SIDE dca tt23 -0.184 -0.381 -0.00106 0.023
## 36 SIDE drm tt21 0.713 0.294 1.11 0.999
## 37 SIDE drm tt23 0.786 0.474 1.12 1
## 38 SINT dca drm 0.0842 0.0313 0.134 0.997
## 39 SINT drm tt21 -0.162 -0.288 -0.0662 0
## 40 SINT drm tt23 -0.131 -0.193 -0.0707 0
## 41 SOLE dca drm -0.0172 -0.0327 -0.00491 0.002
## 42 SOLE dca tt23 -0.0147 -0.0245 -0.00511 0.002
## 43 SOLE drm tt21 0.0180 0.000864 0.0355 0.977
## 44 SOLE tt21 tt23 -0.0155 -0.0280 -0.000749 0.02
impact_zones <- st_read("data/Impact_zones.kml") %>%
st_zm(drop = TRUE, what = "ZM") %>%
st_make_valid() %>%
st_transform(st_crs(polygons_clean))
## Reading layer `Impact_zones' from data source
## `/Users/rosscunning/Projects/PENIP/data/Impact_zones.kml' using driver `KML'
## Simple feature collection with 9 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY, XYZ
## Bounding box: xmin: -80.10538 ymin: 26.08289 xmax: -80.08298 ymax: 26.1069
## z_range: zmin: -21.40724 zmax: 6.930363e-14
## Geodetic CRS: WGS 84
impact_zones <- impact_zones %>%
rename(ImpactZone = Name) # or whatever column contains zone names
library(ggplot2)
impact_zones_plot <- impact_zones %>%
mutate(linetype_group = "Impact Zone")
ggplot() +
# Habitat polygons
geom_sf(data = polygons_clean, aes(fill = Type), color = "black", size = 0.2, alpha = 0.6) +
# Impact zones with dummy linetype for legend
geom_sf(data = impact_zones_plot, aes(linetype = linetype_group),
fill = NA, color = "black", linewidth = 0.6, show.legend = TRUE) +
# Color scales
scale_fill_brewer(palette = "Set3", na.value = "gray80") +
scale_linetype_manual(name = "", values = c("Impact Zone" = "dashed")) +
# Theme and layout
theme_minimal() +
labs(title = "Habitat Polygons within Impact Zones", fill = "Type") +
theme(legend.position = "right") +
xlim(-80.11, -80.079) +
ylim(26.08, 26.11)
# Spatial intersection of habitat polygons with impact zones
habitat_in_zones <- st_intersection(polygons_clean, impact_zones)
# Use a projected CRS for accurate area (e.g., UTM Zone 17N for South Florida)
habitat_in_zones_proj <- habitat_in_zones %>%
st_transform(32617) %>%
mutate(area_m2 = st_area(geometry))
# Adjust column names depending on your Impact Zones KML
area_summary <- habitat_in_zones_proj %>%
st_drop_geometry() %>%
group_by(Type, ImpactZone) %>%
summarize(total_area_m2 = sum(as.numeric(area_m2)), .groups = "drop")
area_summary[area_summary$Type == "Artificial", "Type"] <- "Nearshore Ridge Complex"
area_summary <- area_summary %>%
group_by(Type, ImpactZone) %>%
summarize(total_area_m2 = sum(total_area_m2))
area_summary <- area_summary %>%
mutate(ImpactZone = factor(ImpactZone,
levels = c("Channel", "Side Slopes", "Scenario 2, > 10 cm", "Scenario 2, 5.1-10 cm",
"Scenario 4, 1.1-5 cm", "Scenario 4, 0.51-1 cm", "Scenario 4, 0.1-0.5 cm")))
totals <- left_join(total_abund, area_summary, by = "Type") %>%
mutate(tot_estimate = fit_mean * total_area_m2) %>%
mutate(Type = factor(Type, levels = type_levels)) %>%
select(dataset, Type, ImpactZone, tot_estimate)
# Define ordered impact zone levels
impact_levels <- c("Channel", "Side Slopes", "Scenario 2, > 10 cm", "Scenario 2, 5.1-10 cm",
"Scenario 4, 1.1-5 cm", "Scenario 4, 0.51-1 cm", "Scenario 4, 0.1-0.5 cm")
# Ensure correct factor order
totals <- totals %>%
mutate(ImpactZone = factor(ImpactZone, levels = impact_levels))
# Cumulative summaries across increasing levels
cumulative_zones <- seq_along(impact_levels) %>%
map_dfr(function(i) {
zone_subset <- impact_levels[1:i]
totals %>%
drop_na(dataset) %>%
filter(ImpactZone %in% zone_subset) %>%
group_by(dataset, Type) %>%
summarize(tot = sum(tot_estimate), .groups = "drop") %>%
pivot_wider(names_from = dataset, values_from = tot) %>%
mutate(zone_group = paste0("z", i))
})
cumulative_zones %>%
filter(zone_group == "z7") %>%
pivot_longer(2:5, names_to = "dataset") %>%
mutate(dataset = factor(dataset, levels = dataset_levels)) %>%
ggplot(aes(x = Type, y = value, fill = dataset)) +
geom_col(position = position_dodge()) +
scale_fill_discrete(labels = dataset_labels) +
labs(x = "", y = "Total corals", title = "Total corals in all impact zones, by habitat")
cumulative_zones %>%
filter(zone_group == "z7") %>%
pivot_longer(2:5, names_to = "dataset") %>%
group_by(dataset) %>%
summarize(value = sum(value, na.rm = T)) %>%
mutate(dataset = factor(dataset, levels = dataset_levels)) %>%
ggplot(aes(x = 1, y = value, fill = dataset)) +
geom_col(position = position_dodge()) +
scale_fill_discrete(labels = dataset_labels) +
labs(x = "", y = "Total corals", title = "Total corals in all impact zones - all habitats")